estimates clear
clear all

/*----------------------------------------------------------------
	Replicate tables and appendix material from:
	
		Casey, Glennerster, Miguel, and Voors, 
		"Skill Versus Voice in Local Development" 
		
	NOTE: Running this file requires installation of several user written commands
	
	This do file uses the following de-identified datasets:
	- public_contentproposals.dta
	- public_expscore.dta
	- public_man_capital.dta
	- public_mancap_results.dta
	- public_manager_selection.dta
	- public_policyscore.dta
	- public_submission_matched.dta
	- public_submission_survey.dta
	- public_sullay_data.dta
	- public_techscore.dta
	- public_vill_overview.dta
	
	And the following file from the replication files for the paper:
		Casey, Glennerster, and Miguel, "Reshaping Instutions: Evidence on
		Aid Impacts Using a Preanalysis Plan"
		
	- gbf_analysis.dta
	See: https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/21708
	
	And generates the following outputs for tables in the main text and appendix:
	- Table 1: table1.rtf
	- Table 2: table2A.xlsx, table2B.xlsx
	- Table 3: table3.rtf
	- Appendix Table A1: tableA1.rtf
	- Appendix Table A2: tableA2.rtf
	- Appendix Table A3: tableA3.rtf
	- Appendix Table A4: tableA4.rtf
	- Appendix Table A5: tableA6.rtf
	- Appendix Table A6: tableA6.rtf
	- Appendix Table A7: tableA7.rtf
	- Appendix Table A8: tableA8.rtf
	- Appendix Table A9: tableA9.rtf
	- Appendix Table A10: tableA10.rtf
	- Appendix Table A11: tableA11.csv
	- Appendix Table A12: tableA12.rtf
	- Appendix Table A13: tableA13.rtf

	and following outputs for figures in the main text and appendix: 
	
	- Figure 2: fig2.png
	- Figure 3: fig3.png
	- Figure 4: fig4A.png, fig4B.png
	- Appendix Figure A1: figA1A.png, figA1B.png
	- Appendix Figure A2: figA2.png
	
	In-text estimates are reported, and are flagged as "IN-TEXT ESTIMATE"
	
	last modified 22 June 2021
------------------------------------------------------------------------------*/

*** START HERE: set your working directory to where you have saved the folder on your device:
*cd "[INSERT YOUR LOCATION HERE]"
set seed 500

*** Uncomment if you have not installed these user-written commands
* ssc install catplot
* ssc install splitvallabels


***************************
*   PART I: FORMAT DATA   *
***************************

********************************************************************************
******************* Merge Multiple Surveys Into One Dataset ********************
********************************************************************************


**************
* Village-level information on treatment assignment
**************

use "public_vill_overview.dta", clear

	* drop three pilot villages
	drop if id_vill == .
	
	
**************
* Expert Scoring of Proposals, see instrument: "Expert Scoring_actual"
**************	
merge 1:1 id_vill using "public_expscore.dta", nogen

* average expert scores by village
preserve
	use "public_expscore.dta", clear
	tostring esc*comment_1, replace
	tostring esc*comment_2, replace
	
	* Data is wide with respect to expert; make long
	* First, grab variable list and create a local with the stubs
	unab mylist : esc_*_1
	foreach var of local mylist {
		local stubs `stubs' `=substr("`var'",1,length("`var'")-2)'
	}
	
	* Reshape to long using the stubs, generate the mean by village
	reshape long `stubs', i(id_vill district proposal_id vill_name) j(expert) string
	foreach var in esc_I_a esc_I_b_1 esc_I_b_2 esc_I_b_3 esc_I_b_4 esc_I_b_5 esc_II_a esc_II_b esc_III_a esc_III_b esc_III_c {
		egen `var'_mean=mean(`var'), by(id_vill)
	}
	
	keep id_vill district proposal_id vill_name *mean
	duplicates drop id_vill, force
	save "temp_expscore_means.dta", replace
restore

* Merge in to the main data
merge 1:1 id_vill using "temp_expscore_means.dta", nogen


**************
* Government Scoring of Proposals
**************	
merge 1:1 id_vill using "public_policyscore.dta", nogen


**************
* Coding of Project Completeness, see instrument: "Technical scoring_actual"
**************	
merge 1:1 id_vill using "public_techscore.dta", nogen

**************
* Data from original proposal submissions, see instrument: "Submission Survey_actual"
**************	
* Non-merging observations had missing data or did not hand in proposal_id
merge 1:1 id_vill using "public_submission_survey", nogen


**************
* Observation of village proposal deliberation, see instruments:
*	 "Manager selection - tally sheet enumerator A_actual" and
*	 "Manager selection - tally sheet enumerator B_actual"
**************
merge 1:1 id_vill using "public_manager_selection.dta", nogen



********************************************************************************
************************** Create Variables and Tidy ***************************
********************************************************************************


**************
* Primary Outcomes
**************	
*#* Technical Score

*#* Expert Score
egen expscore = rmean(esc_total_2 esc_total_1)
*if missing use the score from available data from other scorer
replace expscore = esc_total_1 if expscore==. 
label var expscore "Expert score (0-100)"
			
*#* Gov't Score
egen policyscore = rmean(scorer1 scorer7 scorer8 scorer9 scorer10 scorer2 scorer3 scorer4 scorer5 scorer6)
label var policyscore "Policy score (0-100)"

*#* Won Grant
* Known winners 
gen winner = 0
foreach num of numlist 3 7 16 44 49 88 103 108 110 122 146 147 250 176 178 206 210 216 221 232 213 {
	replace winner = 1 if id_vill == `num'
}
label var winner "Indicator: Winning Proposal"
replace winner = . if expscore==. // Did not submit


**************
* Other Prep Work
**************	
* Irrelevant going forwards
drop  a8 a1 a5 a2 a3 

* Non-submitters did not win
replace winner = 0 if mi(winner)

*  used to allocate winners (does not necessarily coincide with poliscyscore)
destring meanscore, replace


**************
* Standardization Steps w/ Proposal Score Indices
**************	

* Mean index, standardizing, scaling to Arm 1 (i.e. status quo w/ no original CDD treatment)
local l = 0
foreach var of varlist techscore meanscore expscore { 
	local l = `l' + 1
	
	* Control mean and SD
	qui sum `var' if SQ==1 & t1==0
	gen `var'_mean=r(mean) 
	gen `var'_sd=r(sd) 
	
	* standardize relative to control mean and SD
	qui gen std_`l'=(`var'-`var'_mean)/`var'_sd 
	
	* impute missing values 
	egen mean_std_`l'_sq1cdd1=mean( std_`l') if SQ==1 & t1==1
	egen mean_std_`l'_sq0cdd1=mean( std_`l') if SQ==0 & t1==1
	egen mean_std_`l'_sq0cdd0=mean( std_`l') if SQ==0 & t1==0
	
	* replacing missing values with mean standardized value for treated observations
	replace std_`l'=mean_std_`l'_sq1cdd1 if SQ==1 & t1==1 & std_`l'==. 
	replace std_`l'=mean_std_`l'_sq0cdd1 if SQ==0 & t1==1 & std_`l'==. 
	replace std_`l'=mean_std_`l'_sq0cdd0 if SQ==0 & t1==0 & std_`l'==. 
	
	* also replace missing values with standardized value 
	* By construction is 0 for control group
	replace std_`l'=0 if SQ==1 & t1==0 & std_`l'==. 
	drop `var'_mean `var'_sd mean_std*
}

* Naming
rename std_1 techscore_cddsq
rename std_2 policyscore_cddsq
rename std_3 expscore_cddsq
				 
*#* Proposal Score Index (Outcome) [For SQ & CDD scaling]
egen zscore_HII_cddsq=rowmean(techscore_cddsq policyscore_cddsq expscore_cddsq) 			 
la var zscore_HII_cddsq "Proposal Score (index)"

* Alternative Standardization, scaling to status quo (ignoring CDD treatment)
local l=0
foreach var of varlist techscore meanscore expscore { 
	local l = `l' + 1 
	
	* Control mean and SD
	 qui sum `var' if SQ==1 
	 gen `var'_mean=r(mean) 
	 gen `var'_sd=r(sd) 
	
	* Standardize relative to control mean and SD
	qui gen std_`l'=(`var'-`var'_mean)/`var'_sd 
	
	* Impute missing values 
	egen mean_std_`l'_t=mean( std_`l') if SQ==0 
	
	* Replacing missing values with mean standardized value for treated observations
	replace std_`l'=mean_std_`l'_t if SQ==0 & std_`l'==. 
	
	* Also replaces missing values with standardized value
	* By construction is 0 for control group
	replace std_`l'=0 if SQ==1 & std_`l'==. 
	drop `var'_mean `var'_sd mean_std_`l'_t 	
} 

* Naming
rename std_1 techscore_sq
rename std_2 policyscore_sq
rename std_3 expscore_sq		

*#* Proposal Score Index (Outcome) [For SQ scaling]
egen zscore_HII_sq=rowmean(techscore_sq policyscore_sq expscore_sq) 
la var zscore_HII_sq "Means Index"			


**************
* Final cleaning & generation steps
**************	

* Labels
label var techscore_cddsq "Technical Score"
label var policyscore_cddsq "Gov't Score" 
label var expscore_cddsq "Expert Score"
label var techscore_sq "Technical Score"
label var policyscore_sq "Gov't Score" 
label var expscore_sq "Expert Score"
label var zscore_HII_cddsq "Proposal Score (index)"
label var zscore_HII_sq "Proposal Score (index)"
label var winner "Won a Grant"
label var MS "Technocratic Selection"
label var TR "Training"		

* Interacted treatment indicator
gen MS_t1 = MS*t1
label var t1 "CDD"
label var MS_t1 "Technocratic Selection * CDD"

* Save as working dataset
save "analysis.dta", replace



********************************************************************************
************************ Clean Managerial Capital Data *************************
********************************************************************************
use "public_man_capital.dta", clear

* Merge in treatment data
preserve
	use "public_vill_overview.dta", clear
	drop if id_vill==. 
	save "temp_vill_overview.dta", replace
restore
merge m:1 id_vill using "temp_vill_overview.dta"
	ren _merge _merge_t
	

*  MS nominee pool vs chiefs – age, education, gender, other from test data
g chief = 0
	replace chief = 1 if status == 1 
g age = s1_3
g male = s1_4
	recode male (2=0)
g anyeduc = 0 if s1_7 ==0
	replace anyeduc = 1 if s1_7 !=0
g borninv = 0
	replace borninv = 1 if s1_12a== 1
g ruling = 0
	replace ruling = 1 if s1_11== 1
g farmer = 0
	replace farmer = 1 if s1_8 == 1
g teacher = 0
	replace teacher = 1 if s1_8 == 26
g business = 0
	replace business = 1 if s1_8 == 11 | s1_8 == 13 | s1_8 == 16 | s1_8 == 22	
	
* Save
save "mc_cap.dta", replace
	
	
********************************************************************************
******************* Tidy Up: Temp files from data formatting *******************
********************************************************************************
sleep 1000 //In case still writing to disk
erase "temp_expscore_means.dta"
erase "temp_vill_overview.dta"
	
	
	
	
	

	
***************************
*    PART II: ANALYSIS    *
***************************
use  "analysis.dta", clear


	///////////////
	// IN-TEXT ESTIMATE: 
		*As we show later, 98% of villages entered the grants competition, which provides revealed preference evidence that communities found this a worthwhile opportunity.
	//////////////
	
	
	count if mi(proposal_id)
	di 1 - (`r(N)'/_N)
	
	
	///////////////
	// IN-TEXT ESTIMATE: 
	* Note that we do not examine effects on entry into the competition as we originally intended, as nearly all study villages (232 out of 236) submitted a proposal, affording minimal variation to examine
	//////////////
	
	count if mi(proposal_id)
	di _N - `r(N)'
	
	///////////////
	// IN-TEXT ESTIMATE: Footnote 9
	* Submission rates are statistically balanced across treatment arms and range from 97 to 100 percent.
	//////////////
	
	gen submitted = !mi(proposal_id)
	reg submitted t1##MS t1##TR tothhs road i.r_ward, robust
	drop submitted

********************************************************************************
************* Figure 1: Experimental Design *************
********************************************************************************

*Sample size in CDD treatment - control
ta t1

*Sample size in arms 1-6
ta SQ if SQ == 1 & t1 ==0
ta MS if MS == 1 & t1 ==0 & TR == 0
ta TR if TR == 1 & t1 ==0
ta SQ if SQ == 1 & t1 ==1
ta MS if MS == 1 & t1 ==1 & TR == 0
ta TR if TR == 1 & t1 ==1

********************************************************************************
************* Figure 2: Distribution of Government Proposal Scores *************
********************************************************************************

* CDF Score by MS
label define ms_lab 0 "Chiefly Default" 1 "Technocratic Selection" 
label values MS ms_lab

* Non-submitters <- treatment arm mean
gen score = meanscore
egen mean_MS0= mean(score) if MS==0 
egen mean_MS1= mean(score) if MS==1 
replace score = mean_MS0 if MS==0 & meanscore==.
replace score = mean_MS1 if MS==1 & meanscore==.

* Cumulative score by arm
cumul score if MS==0, generate(ms0)
cumul score if MS==1, generate(ms1)

	///////////////
	// IN-TEXT ESTIMATE: Kolmogorov-Smirnov test rejects equivalence at p-value = 0.03
	//////////////
	ksmirnov score, by(MS) exact

* Plot Figure 2
sort score MS
la var ms0 "c.d.f. of Chiefly Default"
la var ms1 "c.d.f. of Technocratic Selection" 	
twoway (line ms0 score, sort lwidth(vv. thick)) (line ms1 score, sort lwidth(thick)), xtitle("Government Project Proposal Score") scale(0.8) xline(67) graphregion(color(white)) yscale(range(0 1.13)) text(1.14 55 "Winning score threshold", orient(horizontal) si(medium) justification(center) )
graph export "fig2.png", replace
*Notes: This figure presents the cumulative density of the scores the relevant district governments gave to proposals submitted by communities, separately for those assigned to the chiefly default condition (treatment arms 1 and 4 in Figure 1) and to the technocratic selection treatment (arms 2, 3, 5 and 6).  The vertical line demarcates the minimum score threshold that determines which communities won an implementation grant (standardized by minus 1 point for Bombali District to place both districts on a uniform scale).  Scores imputed at experimental arm mean for the four non-submitting communities (N = 236).  A Kolmogorov-Smirnov test rejects equivalence of the two distributions at p-value = 0.03.

	
********************************************************************************
****************** Figure 3: Proposal Performance Across Arms ******************
********************************************************************************

* Order treatment arms for plot
gen order = .
	replace order = 1 if SQ == 1 & t1==0
	replace order = 2 if SQ == 1 & t1==1
	replace order = 3 if MS == 1 & TR==0 & t1==0
	replace order = 4 if MS == 1 & TR==0 & t1==1
	replace order = 5 if MS == 1 & TR==1 & t1==0
	replace order = 6 if MS == 1 & TR==1 & t1==1
	
* Build dataset collapsed by arm for the plot
* Collapsing for mean, SD then calculating tails
preserve
local mvar "zscore_HII_cddsq"
		local contvar "order"
		collapse (mean) m`mvar'=  `mvar' (sd) sd`mvar' = `mvar' (count) n=`mvar' , by(`contvar')
		gen hi`mvar'= m`mvar'+ invttail(n-1, 0.025)*(sd`mvar' / sqrt(n))
		gen lo`mvar'= m`mvar' - invttail(n-1, 0.025)*(sd`mvar' / sqrt(n))
		
		* Plot
		twoway rcap hi`mvar' lo`mvar' `contvar', xlabel(1 `" "Chiefly Default" "no CDD (Arm 1)" "' 2  `" "Chiefly Default" "with CDD (Arm 4)" "' 3 `" "Technocrats" "no CDD (Arm 2)" "'  4  `" "Technocrats" "with CDD (Arm 5)" "'  5 `" "Trained Technocrats" "no CDD (Arm 3)" "' 6  `" "Trained Technocrats" "with CDD (Arm 6)" "' , valuelabel labsize(vsmall)) /// 
		ylabel(-0.5(0.25)1) yscale(range(0 1)) xscale(range(0.5 6.5))  vertical || scatter  m`mvar' `contvar' , ///
		ytitle("Proposal Score (Index)") xtitle("") legend(off)  xsize(6)  graphregion(color(white)) 
		graph export "fig3.png", replace
restore
	* Notes: This figure presents the mean proposal score index and 95 percent confidence interval for the different types of managers in each of the six experimental arms indicated in Figure 1. Scores are standardized with respect to chiefs in the default condition without CDD exposure (Arm 1) and expressed in standard deviation units.  The brackets compare two specific arms to each other and report the difference in mean scores and associated p-value from a t-test of equality of means across arms. The positive and marginally significant difference between Arm 2 and Arm 1 above captures the “pure” effect of technocratic selection in the absence of CDD.  The positive and highly significant difference between Arm 3 and Arm 1 captures the combined effect of selecting and training technocrats in the absence of CDD.  The three brackets below the point estimates capture the effect of CDD across comparable treatment arms in the technocratic selection experiment.  The first two null results suggests that neither chiefs nor technocrats perform any better in CDD versus control communities.  The rightmost bracket suggests that technocrats with CDD experience responded more strongly to the management training. Yet note that in the regression analogue (in Appendix Table A5), the F-test cannot reject that all three CDD estimates are jointly equal to zero, while the comparable F-test for the four technocratic selection and training arms rejects at above 99 percent confidence. Missing values for communities that did not submit a proposal are imputed at the relevant treatment arm mean.


* Statistical comparisson between arms (added manually in figure)
mean zscore_HII_cddsq, over(order)
ttest zscore_HII_cddsq if order <3, by(order)
ttest zscore_HII_cddsq if order==1 | order==3, by(order)
ttest zscore_HII_cddsq if order==1 | order==5, by(order)
ttest zscore_HII_cddsq if order==5 | order==6, by(order)
ttest zscore_HII_cddsq if order==3 | order==4, by(order)



********************************************************************************
**************** Figure A2: Distribution of Scores by Treatment ****************
********************************************************************************

	///////////////
	// IN-TEXT ESTIMATE: The two sets of scores are highly positively correlated (correlation coefficient of 0.87) 
	//////////////	
	
	corr policyscore expscore


twoway  (scatter policyscore expscore if t1==1 & SQ==1, msymbol(circle_hollow)) ///
		(scatter policyscore expscore if MS==1 & t1==1, msymbol(circle)) ///
		(scatter policyscore expscore if t1==0 & SQ==1, msymbol(triangle_hollow)) ///
		(scatter policyscore expscore if MS==1 & t1==0, msymbol(triangle)) ///
		(function y=x, range(0 100)),  ///
		yscale(range(0 100)) ylabel(0(20)100) xscale(range(0 100)) xlabel(0(20)100) xmtick(0(20)100) ///
		ytitle(Government Proposal Score) xtitle(Expert Proposal Score) graphregion(color(white)) ///
		legend(order(1 "CDD Treatment, Chiefly Default" 2 "CDD Treatment, Technocrats" 3 "No CDD, Chiefly Default" 4 "No CDD, Technocrats"))
		graph export "figA2.png", replace
		* Notes: This figures plots the distribution of proposal scores given by the district government officials to allocate the infrastructure grants (Y-axis) against the scores given by unaffiliated development practitioners using the same scoring guidelines (X-axis).  Higher scores indicate higher quality proposals.  Both sets of raters were blinded to the name of the submitting communities.  Each dot represents a proposal submitted by a particular community, where triangles indicate CDD treatment status, circles indicate CDD control status, shaded in shapes indicate assignment to technocratic selection and hollow shapes indicate assignment to the chiefly control default condition.
		

********************************************************************************
************************ Figure 4: Delegation Unpacked *************************
************ Figure A1: Delegation Unpacked Including Missing Values ***********
********************************************************************************

* Full Data
tempfile fulldata
save `fulldata', replace

* Treatment indicators & randomisation blocking vars
keep MS id_vill r_ward t1 
gen MS_t1 = MS*t1
label var t1 "CDD"
label var MS_t1 "Technocratic Selection * CDD"
tempfile treat
save `treat'

* Data from fuzzy merging by names the managerical capital instrument 
* and submission instruments by names; original data cannot be made public
* See ReadMe and namematch_sub_mccap.do for details
use "public_submission_matched.dta", clear
merge m:1 id_vill using `treat', gen(_mer)

*Find missing villages
preserve
	* Type 4: Leader/Submitter, Type 5: Most say in choosing project
	keep if (type==5 & position==1) | type==4
	bys id_vill: gen count=_N
	keep id_vill count MS t1 MS_t1 r_ward
	keep if count==1
	drop count
	gen missvill=1
	gen type=5
	gen position=1
	tempfile missvil
	save `missvil'
restore
append using `missvil'

* Prep for plot
keep if (type==5 & position==1)
gen treat=MS+1
gen treat1=t1+1
label define ms 1 "Chiefly Default" 2 "Technocratic Selection" 
label val treat ms
label define cdd 1 "Control" 2 "CDD" 
label val treat1 cdd

* Categories
gen cat=.
	replace cat=1 if chief==1
	replace cat=2 if topscorer==1 & chief==0
	replace cat=3 if found==1 & cat==.
	replace cat=4 if notfound==1
	replace cat=5 if cat==.
lab def cate 1 "Chief" 2 "Top scorer" 3 "Other test takers" 4 "People not found" 5 "Missing name" 
lab val cat cate

**************
* Fig A1, Panel A
**************	
splitvallabels treat
catplot cat treat, ///
var1opts(label(labsize(small))) ///
var2opts(label(labsize(small)) relabel(`r(relabel)')) ///
percent(treat) asyvars stack ///
bar(1, color(navy) fintensity(inten100)) ///
bar(2, color(dkgreen) fintensity(inten60)) ///
bar(3, color(green) fintensity(inten40)) ///
bar(4, color(dimgrey) fintensity(inten10)) ///
bar(5, color(white) fintensity(inten10) lcolor(grey)) ///
legend(rows(1) stack size(small) ///
order(1 "Chief" 2 "Top scorer" "not chief"  ///
3 "Other test" "takers" ///
4 "People not" "in data" 5 "Missing" "data") ///
symplacement(center) ///
title(Types of Project Leaders, size(small))) ///
ytitle("%", size(small)) ///
graphregion(color(white))

graph export "figA1A.png", replace

* Percentages from graph
tab cat treat, col

**************
* Fig A1, Panel B
**************	
splitvallabels treat1
catplot cat treat1, ///
var1opts(label(labsize(small))) ///
var2opts(label(labsize(small)) relabel(`r(relabel)')) ///
percent(treat1) asyvars stack ///
bar(1, color(navy) fintensity(inten100)) ///
bar(2, color(dkgreen) fintensity(inten60)) ///
bar(3, color(green) fintensity(inten40)) ///
bar(4, color(dimgrey) fintensity(inten10)) ///
bar(5, color(white) fintensity(inten10) lcolor(grey)) ///
legend(rows(1) stack size(small) ///
order(1 "Chief" 2 "Top scorer" "not chief"  ///
3 "Other test" "takers" ///
4 "People not" "in data" 5 "Missing" "data") ///
symplacement(center) ///
title(Types of Project Leaders, size(small))) ///
ytitle("%", size(small)) ///
graphregion(color(white))

graph export "figA1B.png", replace
	* Add Notes: Notes: This figure unpacks delegation by showing how the identity of who had the most say in choosing the project matches the community nominations and managerial capital testing data, and includes communities missing data (as a companion to main text Figure 4). Panel A shows that under technocratic selection (pooling communities across the CDD arms), communities were much more likely to select the top scorer on the managerial capital test to choose the project; while under the chiefly default, communities were much more likely to rely on the village headman. Panel B shows that the CDD experience made communities somewhat more likely to select the top scorer (pooling communities across the technocratic selection arms), but by much less than the public nudge to delegate.

**************
* Fig 4, Panel A
**************
drop if cat==5	// Missing data

splitvallabels treat
catplot cat treat, ///
var1opts(label(labsize(small))) ///
var2opts(label(labsize(small)) relabel(`r(relabel)')) ///
percent(treat) asyvars stack ///
bar(1, color(navy) fintensity(inten100)) ///
bar(2, color(dkgreen) fintensity(inten60)) ///
bar(3, color(green) fintensity(inten40)) ///
bar(4, color(dimgrey) fintensity(inten10)) ///
legend(rows(1) stack size(small) ///
order(1 "Chief" 2 "Top scorer" "not chief"  ///
3 "Other test" "takers" ///
4 "People not" "in data") ///
symplacement(center) ///
title(Types of Project Leaders, size(small))) ///
ytitle("%", size(small)) ///
graphregion(color(white))

graph export "fig4A.png", replace

* Percentages from graph
tab cat treat, col

**************
* Fig 4, Panel B
**************
splitvallabels treat1
catplot cat treat1, ///
var1opts(label(labsize(small))) ///
var2opts(label(labsize(small)) relabel(`r(relabel)')) ///
percent(treat1) asyvars stack ///
bar(1, color(navy) fintensity(inten100)) ///
bar(2, color(dkgreen) fintensity(inten60)) ///
bar(3, color(green) fintensity(inten40)) ///
bar(4, color(dimgrey) fintensity(inten10)) ///
legend(rows(1) stack size(small) ///
order(1 "Chief" 2 "Top scorer" "not chief"  ///
3 "Other test" "takers" ///
4 "People not" "in data") ///
symplacement(center) ///
title(Types of Project Leaders, size(small))) ///
ytitle("%", size(small)) ///
graphregion(color(white))

graph export "fig4B.png", replace
	* Add Notes: Notes: This figure unpacks delegation by showing how the identity of who had the most say in choosing the project matches the community nominations and managerial capital testing data. Panel A shows that under technocratic selection (pooling communities across the CDD treatment arms), communities were much more likely to select the top scorer on the managerial capital test to choose the project; while under the chiefly default, communities were much more likely to rely on the village headman. Panel B shows that the CDD experience made communities somewhat more likely to select the top scorer (pooling communities across the technocratic selection treatment arms), but by much less than the public nudge to delegate. 


********************************************************************************
******************* Table 1: Treatment Effects on Performance ******************
********************************************************************************
use `fulldata', clear

* PANEL A: Technocratic Selection versus CDD
eststo clear
foreach var in zscore_HII_cddsq techscore_cddsq expscore_cddsq policyscore_cddsq winner  {
	eststo: reg `var' MS t1 MS_t1 tothhs road i.r_ward, robust
	test MS MS_t1 
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0 & t1==0
	estadd scalar controlmean = r(mean)
}

esttab `var' using "table1.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest controlmean, fmt( %9.0f %9.3f %9.3f %9.3f) labels ("Observations" "Fstat" "pvalue" "Control Mean")) style(fixed) ///
title ("Table 1. Panel A: Technocratic Selection versus CDD") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score" "Won a Grant" "Winner, 50th Percentile") ///
keep(MS t1 MS_t1)
	
* PANEL B: Technocratic Selection and Managerial Training
eststo clear
foreach var in zscore_HII_sq techscore_sq expscore_sq policyscore_sq winner  {
		eststo: reg `var' MS TR i.block, robust
		su `var' if SQ==1
		test MS TR
		estadd scalar ftest = r(F)
		estadd scalar pftest = r(p)
		quietly sum `var' if MS==0
		estadd scalar controlmean = r(mean)
	}
	
esttab `var' using "table1.rtf", append label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest controlmean, fmt( %9.0f %9.3f %9.3f %9.3f) labels ("N" "Fstat" "pvalue" "Control Mean" )) style(fixed) ///
title ("Table 1. Panel B: Technocratic Selection and Managerial Training") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score" "Won a Grant" "Winner, 50th Percentile") ///
keep(MS TR)  

	* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) robust standard errors; iii) specifications in Panel A pool the technocratic selection and training arms together (see Appendix Table A4 for full interaction model) and include strata for geographic ward and two balancing variables (distance to road and community size) from the original randomization; iv) specifications in Panel B include the two balancing variables and strata for ward crossed with CDD assignment;  v) outcomes in columns 2 to 4 are mean effects indices, expressed in standard deviation units, standardized with respect to the mean and standard deviation of control arm 1 (arms 1 and 4) in Figure 1 for Panel A (B) (see Kling, Liebman and Katz 2007); vi) missing scores for the 4 non-submitting communities are imputed at the respective treatment arm mean (see Appendix Tables A2 and A3 for imputation bounds); vii) outcome in column 1 is an equally weighted index of those in columns 2 to 4; viii) outcome in column 5 is a binary indicator;  ix) Training term in Panel B captures the additional effect of training beyond that of technocratic selection; x) the F-statistic and associated p-value evaluate the hypothesis that the listed terms are jointly equal to zero; and xi) the sample for all specifications includes all communities in Figure 1.	
	

********************************************************************************
************* IN TEXT ESTIMATES WITH MANAGEMENT CAPITAL TEST DATA **************
********************************************************************************	
use "public_mancap_results.dta", clear

	///////////////
	// IN-TEXT ESTIMATE: 
	* Comparing technocratic selection to the default of chiefly control, the highest scoring manager nominated by the community strongly outperforms the village headman, by 1.7 standard deviation units on average (standard error 0.14), on the written management test.
	//////////////
	
	foreach var of varlist s9* {
		local i=`i'+1
		qui sum `var'
		local problem_sd=r(sd) // For when there's no variation in controls 
		
		* Standardise to highest scorers arm
		qui sum `var' if MS==0
		qui gen `var'_mean=r(mean)
		qui gen `var'_sd=r(sd)
		qui replace `var'_sd=`problem_sd' if `var'_sd==0 // Impute as above
		qui gen std`i'=(`var'-`var'_mean)/`var'_sd
		
		qui egen mean_std_t=mean(std`i') if MS==1 // use new treatment MS
		qui replace std`i'=mean_std_t if MS==1 & std`i'==.
		qui replace std`i'=0 if MS==0 & std`i'==.
		drop `var'_mean `var'_sd mean_std_t
	}
	
	ttest std9 if manager == 1, by(MS)

	///////////////
	// IN-TEXT ESTIMATE: Footnote 13	
	* Note that we estimate a null result on whether management training further enhanced the technocrats' scores (equal to -0.027 standard deviation units with standard error 0.133)
	//////////////
	
	ttest std9 if manager==1 & MS == 1, by(TR)
	
	///////////////
	// IN-TEXT ESTIMATE: 
	* There is a positive correlation between the score of the selected project manager (whether chief or top scorer) and outcomes in the competition: a one standard deviation increase in test score improves measured proposal quality by 0.27 standard deviation units (standard error 0.05) and increases the probability of winning a grant by 5 percentage points (standard error 1.7).
	//////////////	
	reg zscore_HII_sq_deidentified std9 if manager == 1
	reg winner std9 if manager == 1
	
	///////////////
	// IN-TEXT ESTIMATE: 
	* We can break out these correlations for each of the eight core competencies covered by the test. Of these, local infrastructure experience, literacy and numeracy have the most predictive power for proposal quality.
	//////////////		
	
	foreach x of varlist std1-std8{
		reg zscore_HII_sq_deidentified `x' if manager == 1
	}
	* std6 is literacy, std7 is numeracy
	
	///////////////
	// IN-TEXT ESTIMATE: 	
	* 50% of the nominees had a test score of at least 60 points, which is twice the average score of chiefs.
	//////////////	
	
	count if s9_q9 >= 60 & chief == 0
	count if chief == 0
	di 459/1173
	
	///////////////
	// IN-TEXT ESTIMATE:
		* ... these 4 winners scored 22 points higher on the managerial capital test than the mean for all chiefs, an increase of 71 percent, indicative of strong positive selection
	//////////////		
	
	sum s9_q9 if winner == 1 & manager == 1 & chief == 1
	sum s9_q9 if winner == 0 & chief == 1
	di 53/31.1
	
	///////////////
	// IN-TEXT ESTIMATE:	
	*The test runs to 121 points and generated wide dispersion in scores: the range across all test-takers was 1 to 108, with a mean of 42 and standard deviation of 26. 
	//////////////
	
	sum s9_q9
	
********************************************************************************
************** Table 2: Variation in Characteristics of Nominees ***************
********************************************************************************

* Data from the management skills survey, see instrument: "Managerical Capital Test_actual"
use "mc_cap.dta", clear

* Panel A: Chiefs vs Top-scoring Technocrats (in all communities)
gen chief1 = .
	replace chief1 = 1 if chief==1
	replace chief1 = 0 if highest_scorer==1 & chief1!=1
	
preserve
	tempname memhold
	tempfile stats_balance
	postfile `memhold' str60 Name Obs0 Mean0 SD0 Obs1 Mean1 SD1 Pvalue using "`stats_balance'", replace
	
	foreach var in age male anyeduc borninv farmer teacher business {
		
		* Technocrats
		su `var' if chief1==0
		scalar mu0 =`r(mean)'
		scalar sd0 = `r(sd)'
		scalar obs0 = `r(N)'
		
		* Chiefs
		su `var' if chief==1
		scalar mu1 = `r(mean)'
		scalar sd1 = `r(sd)'
		scalar obs1 = `r(N)'
		scalar varr = "`var'"
		
		* Compare 
		reg `var' chief1, vce(cluster id_vill)
		scalar chief1= "chief"
		scalar pvalue = ((2 * ttail(e(df_r), abs(_b[chief1]/_se[chief1]))))
		post `memhold' (varr) (obs0) (mu0) (sd0) (obs1) (mu1) (sd1) (pvalue)
		scalar drop _all
	}
	
	use "public_mancap_results.dta", clear
		* Technocrats
		su s9_q9 if chief1==0
		scalar mu0 =`r(mean)'
		scalar sd0 = `r(sd)'
		scalar obs0 = `r(N)'
		
		* Chiefs
		su s9_q9 if chief==1
		scalar mu1 = `r(mean)'
		scalar sd1 = `r(sd)'
		scalar obs1 = `r(N)'
		scalar varr = "s9_q9"
		
		* Compare 
		reg s9_q9 chief1, vce(cluster new_id_vill)
		scalar chief1= "chief"
		scalar pvalue = ((2 * ttail(e(df_r), abs(_b[chief1]/_se[chief1]))))
		post `memhold' (varr) (obs0) (mu0) (sd0) (obs1) (mu1) (sd1) (pvalue)
		scalar drop _all
	
	postclose `memhold'
	use "`stats_balance'", clear
	
	gen Obs = Obs0 + Obs1
	order Name Mean1 Mean0 Pvalue Obs
	keep Name Mean1 Mean0 Pvalue Obs
	export excel using "table2A.xlsx", replace firstrow(variables)
restore

* Panel B: Technocratic Nominees in CDD Treatment versus Control Communities
gen nominee = 1 if chief==0

preserve
	keep if nominee == 1
	tempname memhold
	tempfile stats_balance
	postfile `memhold' str60 Name Obs0 Mean0 SD0 Obs1 Mean1 SD1 Pvalue using "`stats_balance'", replace
	foreach var in age male anyeduc borninv farmer teacher business {
		
		* CDD Controls
		su `var' if t1==0
		scalar mu0 =`r(mean)'
		scalar sd0 = `r(sd)'
		scalar obs0 = `r(N)'
		
		* CDD Treated
		su `var' if t1==1
		scalar mu1 = `r(mean)'
		scalar sd1 = `r(sd)'
		scalar obs1 = `r(N)'
		scalar varr = "`var'"
		
		* Compare
		reg `var' t1, vce(cluster id_vill)
		scalar chief1= "chief"
		scalar pvalue = ((2 * ttail(e(df_r), abs(_b[t1]/_se[t1]))))
		post `memhold' (varr) (obs0) (mu0) (sd0) (obs1) (mu1) (sd1) (pvalue)
		scalar drop _all
	}
	
	use "public_mancap_results.dta", clear
	keep if chief == 0
		* Technocrats
		su s9_q9 if t1==0
		scalar mu0 =`r(mean)'
		scalar sd0 = `r(sd)'
		scalar obs0 = `r(N)'
		
		* Chiefs
		su s9_q9 if t1==1
		scalar mu1 = `r(mean)'
		scalar sd1 = `r(sd)'
		scalar obs1 = `r(N)'
		scalar varr = "s9_q9"
		
		* Compare 
		reg s9_q9 t1, vce(cluster new_id_vill)
		scalar chief1= "chief"
		scalar pvalue = ((2 * ttail(e(df_r), abs(_b[t1]/_se[t1]))))
		post `memhold' (varr) (obs0) (mu0) (sd0) (obs1) (mu1) (sd1) (pvalue)
		scalar drop _all

	postclose `memhold'
	use "`stats_balance'", clear
	
	gen Obs = Obs0 + Obs1
	order Name Mean0 Mean1 Pvalue Obs
	keep Name Mean0 Mean1 Pvalue Obs
	export excel using "table2B.xlsx", replace firstrow(variables)
restore


********************************************************************************
********************** Table 3: Variation in Chief's Role **********************
********************************************************************************
use `fulldata', clear

**************
* Preparation
**************
* See survey instruments for details of below

* Who chose the proposal? (was the chief mentioned at all?)
gen chiefsay=0 if sub_11_c_1!=""
replace chiefsay=1 if sub_11_c_1_1==1 | sub_11_c_2_1==1 | sub_11_c_3_1==1 | sub_11_c_4_1==1 | sub_11_c_5_1==1 | sub_11_c_6_1==1
replace chiefsay=1 if sub_11_c_1_2==1 | sub_11_c_2_2==1 | sub_11_c_3_2==1 | sub_11_c_4_2==1 | sub_11_c_5_2==1 | sub_11_c_6_2==1
replace chiefsay=1 if sub_11_c_1_3==1 | sub_11_c_2_3==1 | sub_11_c_3_3==1 | sub_11_c_4_3==1 | sub_11_c_5_3==1 | sub_11_c_6_3==1

* Who wrote the project description?
gen chiefwrote=0 if sub_13_c!=""
replace chiefwrote=1 if  sub_13_c_1==1 | sub_13_c_2==1 | sub_13_c_3==1 | sub_13_c_4==1 | sub_13_c_5==1 | sub_13_c_6==1 

* Who did the budget?
gen chiefbudg=0 if sub_14_c!=""
replace chiefbudg=1 if  sub_14_c_1==1 | sub_14_c_2==1 | sub_14_c_3==1 | sub_14_c_4==1 | sub_14_c_5==1 | sub_14_c_6==1 

* Who set the timeline?
gen chieftime=0 if sub_15_c!=""
replace chieftime=1 if  sub_15_c_1==1 | sub_15_c_2==1 | sub_15_c_3==1 | sub_15_c_4==1 | sub_15_c_5==1 | sub_15_c_6==1 

* Who had the idea?
gen chiefidea=0 if sub_17_c!=""
replace chiefidea=1 if  sub_17_c_1==1 | sub_17_c_2==1 | sub_17_c_3==1 | sub_17_c_4==1 | sub_17_c_5==1 | sub_17_c_6==1 

* Who developed the proposal?	
gen chiefdev=0 if sub_12_c_1 !=""
replace chiefdev=1 if  sub_12_c_1_1==1 | sub_12_c_2_1==1 | sub_12_c_3_1==1 | sub_12_c_4_1==1 | sub_12_c_5_1==1 | sub_12_c_6_1==1 

**************
* Table 3
**************
* Panel A: Technocratic Selection Effect
foreach var in chiefsay chiefwrote chiefbudg chieftime {
	ttest `var', by(MS)
	gen ms_`var'_1=r(mu_1)
	gen ms_`var'_2=r(mu_2)
	gen ms_`var'_3=r(p)
	gen ms_`var'_4=r(N_1)+r(N_2)
}

su ms_chief*_*
		
* Panel B: CDD Effect in Full Sample
foreach var in chiefsay chiefwrote chiefbudg chieftime {
	ttest `var', by(t1)
	gen t1_`var'_1=r(mu_1)
	gen t1_`var'_2=r(mu_2)
	gen t1_`var'_3=r(p)
	gen t1_`var'_4=r(N_1)+r(N_2)
}

* Panel C: CDD Effect in Technocratic Selection Arms
foreach var in chiefsay chiefwrote chiefbudg chieftime {
	ttest `var' if MS==1, by(t1)
	gen mst1_`var'_1=r(mu_1)
	gen mst1_`var'_2=r(mu_2)
	gen mst1_`var'_3=r(p)
	gen mst1_`var'_4=r(N_1)+r(N_2)
}

* Export Panel A
preserve
	keep ms_chief*_*
	gen i=_n
	keep if _n==1
	
	reshape long ms_chiefsay_ ms_chiefwrote_ ms_chiefbudg_ ms_chieftime_ , i(i) j(col)
	
	label var ms_chiefsay_ "Proportion where chiefly authorities chose the project"
	label var ms_chiefwrote_  "Proportion where chiefly authorities wrote the description"
	label var ms_chiefbudg_ "Proportion where chiefly authorities did the budget" 
	label var ms_chieftime_ "Proportion where chiefly authorities set the timeline"
	
	eststo clear
	estpost tabstat ms_chiefsay_ ms_chiefwrote_ ms_chiefbudg_ ms_chieftime_, by(col) ///
		statistics(mean) columns(statistics) listwise not
	esttab using "table3.rtf", replace main(Mean) ///
		nostar unstack noobs nonote nonumber nomtitles nodep label ///
		eqlabels("Chiefly Default (arms 1,4)" "Technocratic Selection (arms 2,3,5,6)" "p-value on difference" "Observations")	
restore

* Export Panel B
preserve
	keep t1_chief*_*
	gen i=_n
	keep if _n==1
	
	reshape long t1_chiefsay_ t1_chiefwrote_ t1_chiefbudg_ t1_chieftime_ , i(i) j(col)
	
	label var t1_chiefsay_ "Proportion where chiefly authorities chose the project"
	label var t1_chiefwrote_  "Proportion where chiefly authorities wrote the description"
	label var t1_chiefbudg_ "Proportion where chiefly authorities did the budget" 
	label var t1_chieftime_ "Proportion where chiefly authorities set the timeline"
	
	eststo clear
	estpost tabstat t1_chiefsay_ t1_chiefwrote_ t1_chiefbudg_ t1_chieftime_, by(col) ///
		statistics(mean) columns(statistics) listwise not
	esttab using "table3.rtf", append main(Mean) /// 
		nostar unstack noobs nonote nonumber nomtitles nodep label ///
		eqlabels("CDD Controls (arms 1-3)" "CDD Treatment (arms 4-6)" "p-value on difference" "Observations")	
restore

* Export Panel C
preserve
	keep mst1_chief*_*
	gen i=_n
	keep if _n==1
	
	reshape long mst1_chiefsay_ mst1_chiefwrote_ mst1_chiefbudg_ mst1_chieftime_ , i(i) j(col)
	
	label var mst1_chiefsay_ "Proportion where chiefly authorities chose the project"
	label var mst1_chiefwrote_  "Proportion where chiefly authorities wrote the description"
	label var mst1_chiefbudg_ "Proportion where chiefly authorities did the budget" 
	label var mst1_chieftime_ "Proportion where chiefly authorities set the timeline"
	
	eststo clear
	estpost tabstat mst1_chiefsay_ mst1_chiefwrote_ mst1_chiefbudg_ mst1_chieftime_, by(col) ///
		statistics(mean) columns(statistics) listwise not
	esttab using "table3.rtf", append main(Mean) ///
		nostar unstack noobs nonote nonumber nomtitles nodep label ///
		eqlabels("CDD Controls (arms 2,3)" "CDD Treatment (arms 5,6)" "p-value on difference" "Observations")	
restore
drop ms_chief*_* t1_chief*_* mst1_chief*_*
	* Add Notes: i) outcomes capture the propotion of management decisions that were made by the village headman or other chiefly authorities in the community; ii) Panel A compares communities assigned to technocratic selection (with or without training) to the default of chiefly control; iii) Panel B compares communities assigned to CDD treatment versus control; iv) Panel C compares CDD treated versus control communities in the technocratic selection (with or without training) arms, to look at compliance with the assignment to delegate to technocrats; and v) observations counts vary with missing values or "don't know" responses in the submission survey.

	
********************************************************************************
************************** Table A1: Baseline Balance **************************
********************************************************************************	
	
* Load baseline data from QJE 2012, get core variables with info at baseline
preserve
	use "gbf_analysis.dta", clear
	keep if post==0 // Baseline
	local p_me_corevars_2 "f_psch f_phu f_well f_dryflr f_grnstr f_comcntr f_barrie f_palava f_market proposal"
	keep id_vill tothhs road yrsed vdc assets `p_me_corevars_2' petty trust_chf trust_lc
	tempfile bdata
	save `bdata'
restore

* Index based on QJE 2012 paper
preserve
	keep id_vill MS TR SQ t1 r_ward	
	merge 1:1 id_vill using `bdata' 
	local i=0
	foreach var of varlist  `p_me_corevars_2' {
			local i=`i'+1
			qui sum `var'
			local problem_sd=r(sd) // For when there's no variation in controls 
			
			* Standardise to MS == 0 arm
			qui sum `var' if MS==0  
			qui gen `var'_mean=r(mean)
			qui gen `var'_sd=r(sd)
			qui replace `var'_sd=`problem_sd' if `var'_sd==0 // Impute as above
			qui gen std`i'=(`var'-`var'_mean)/`var'_sd
			
			qui egen mean_std_t=mean(std`i') if MS==1 //use new treatment MS
			qui replace std`i'=mean_std_t if MS==1 & std`i'==.
			qui replace std`i'=0 if MS==0 & std`i'==.
			drop `var'_mean `var'_sd mean_std_t
	}
	egen zscore_inf=rowmean(std*)

	* Treatment vars	
	gen MS_t1 = MS*t1
		la var t1 "CDD"
		la var MS_t1 "Technocratic Selection * CDD"
	
	* Table A1: Balance at Baseline
	local vars tothhs road zscore_inf vdc assets petty yrsed trust_chf trust_lc
		eststo clear
		foreach var in `vars'  {
		eststo: reg `var' MS t1 MS_t1 i.r_ward, robust
		test MS t1 MS_t1 
		estadd scalar ftest = r(F)
		estadd scalar pftest = r(p)
		quiet sum `var' if MS==0
		estadd scalar depmean = r(mean)
		estadd scalar depsd = r(sd)
	}
	esttab `var' using "tableA1.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest depmean, fmt( %9.0f %9.3f) labels ("Observations" "Fstat" "pvalue" "Control Mean")) style(fixed) ///
	title ("Baseline (2005) Balance by Technocratic Selection and CDD") starlevels(* .1 ** .05 *** .01) mlabels("Num. of Households" "Distance to Road" "Infrastructure Score (index)" "VDC" "Assets" "Any Petty Traders" "Years of Education" "Trust in Chiefdom Officials" "Trust in Local Council Officials") ///
	keep(MS t1 MS_t1)
		* Add Notes: Notes i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01. ii) specifications include strata for geographic ward; iii) robust standard errors; iv) F-stat and associated p-value correspond to jointly testing that the three coefficients in each specification are equal to 0; v) all outcomes measured at baseline in 2005; and vi) the outcome in column 1 is the total number of households in the community, in column 2 it is the distance in miles to the nearest motorable road, in column 3 it is a community infrastructure index expressed in standard deivation units that measures whether the community had a functional primary school, health unit, water well, drying floor, grain store, community center, palava hut, court barrie, or market and whether it had submitted any infrastructure project proposal to an external funding agency, in column 4 it is an indicator of whether the community had a village development committee (VDC), in column 5 it is an average measure of household assets and amenities, in column 6 it is an indicator for the presence of any petty traders, in column 7 it is average years of education for household survey respondents, and in column 8 (9) it is an average measure of whether household survey respondents agree with the statement that chiefdom officials (local councillors) can be trusted.
		
		
********************************************************************************
***************** Table A2: Baseline Balance Interacted Model ******************
********************************************************************************	
	
	* Treatment indicators 
	gen nocdd_sq=(t1==0 & SQ==1)
	label var nocdd_sq "No CCD + Default"
	gen cdd_sq=(t1==1 & SQ==1)
	label var cdd_sq "CCD + Default"
	gen nocdd_MS=(t1==0 & MS==1 & TR==0)
	label var nocdd_MS "No CCD + Technocratic Selection"
	gen cdd_MS=(t1==1 & MS==1 & TR==0)
	label var cdd_MS "CCD + Technocratic Selection"
	gen nocdd_MSTR=(t1==0 & MS==1 & TR==1)
	label var nocdd_MSTR "No CCD + Technocratic Selection + Training"
	gen cdd_MSTR=(t1==1 & MS==1 & TR==1)
	label var cdd_MSTR "CCD + Technocratic Selection + Training"
	
	* Table A2: Baseline Interacted Model
	local vars tothhs road zscore_inf vdc assets petty yrsed trust_chf trust_lc
	eststo clear
	foreach var in `vars'  {
		eststo: reg `var' nocdd_MS nocdd_MSTR cdd_sq  cdd_MS cdd_MSTR i.r_ward, robust
		test cdd_sq nocdd_MS cdd_MS nocdd_MSTR cdd_MSTR 
		estadd scalar ftest = r(F)
		estadd scalar pftest = r(p)
		quiet sum `var' if MS==0
		estadd scalar depmean = r(mean)
		estadd scalar depsd = r(sd)
	}
	
	esttab `var' using "tableA2.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest depmean, fmt( %9.0f %9.3f) labels ("Observations" "Fstat" "pvalue" "Control Mean")) style(fixed) ///
	title ("Baseline (2005) Balance Using Fully Interacted Model") starlevels(* .1 ** .05 *** .01) mlabels("Num. of Households" "Distance to Road" "Infrastructure Score (index)" "VDC" "Assets" "Any Petty Traders" "Years of Education" "Trust in Chiefdom Officials" "Trust in Local Council Officials") ///
	keep(nocdd_MS nocdd_MSTR cdd_sq  cdd_MS cdd_MSTR) 		
restore 
	* Add Notes: Notes i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01. ii) specifications include strata for geographic ward; iii) robust standard errors; iv) F-stat and associated p-value correspond to jointly testing that the five coefficients in each specification are equal to 0; v) all outcomes measured at baseline in 2005; and vi) the outcome in column 1 is the total number of households in the community, in column 2 it is the distance in miles to the nearest motorable road, in column 3 it is a community infrastructure index expressed in standard deivation units that measures whether the community had a functional primary school, health unit, water well, drying floor, grain store, community center, palava hut, court barrie, or market and whether it had submitted any infrastructure project proposal to an external funding agency, in column 4 it is an indicator of whether the community had a village development committee (VDC), in column 5 it is an average measure of household assets and amenities, in column 6 it is an indicator for the presence of any petty traders, in column 7 it is average years of education for household survey respondents, and in column 8 (9) it is an average measure of whether household survey respondents agree with the statement that chiefdom officials (local councillors) can be trusted.

	
********************************************************************************
******************** Table A3: Lower Imputation Bound, TEs *********************
********************************************************************************		

**************
* Preparation
**************
* Lower bound, for missing proposals imputed with lowest value of quality score outcome
* Standardize (scale to chief (SQ==1) and CDD control (t1==0) (arm 1)
* Replace with lower bound
foreach var of varlist techscore meanscore expscore {
	qui sum `var'
	qui gen `var'_l = `var'
	replace `var'_l = r(min) if `var'==.
}

local l=0
foreach var of varlist techscore_l meanscore_l expscore_l { 
	local l = `l' + 1 
	* Control mean and SD
	qui sum `var' if SQ==1 & t1==0
	qui gen `var'_mean=r(mean) 	
	qui gen `var'_sd=r(sd) 
	* Standardize relative to control mean and SD
	qui gen std_`l'=(`var'-`var'_mean)/`var'_sd 

	* Impute missing values 
	egen mean_std_`l'_sq1cdd1=mean( std_`l') if SQ==1 & t1==1
	egen mean_std_`l'_sq0cdd1=mean( std_`l') if SQ==0 & t1==1
	egen mean_std_`l'_sq0cdd0=mean( std_`l') if SQ==0 & t1==0

	* Replacing missing values with mean standardized value for treated observations
	replace std_`l'=mean_std_`l'_sq1cdd1 if SQ==1 & t1==1 & std_`l'==. 
	replace std_`l'=mean_std_`l'_sq0cdd1 if SQ==0 & t1==1 & std_`l'==. 
	replace std_`l'=mean_std_`l'_sq0cdd0 if SQ==0 & t1==0 & std_`l'==. 
	* Also replace missing values with standardized value
	* By construction is 0 for control group
	replace std_`l'=0 if SQ==1 & t1==0 & std_`l'==. 
	drop `var'_mean `var'_sd mean_std*
}

ren std_1 techscore_cddsqlow
ren std_2 policyscore_cddsqlow
ren std_3 expscore_cddsqlow

* Outcome: proposal score (index), a means index across all proposal quality scores 
* (techscore meanscore expscore)
egen zscore_HII_cddsqlow=rowmean(techscore_cddsqlow policyscore_cddsqlow expscore_cddsqlow) 

* Standardize (scale to chief (SQ==1) ignoring CDD
local l=0
foreach var of varlist techscore_l meanscore_l expscore_l { 
	local l = `l' + 1 
	* Control mean and SD
	qui sum `var' if SQ==1 
	qui gen `var'_mean=r(mean) 
	qui gen `var'_sd=r(sd) 
	* Standardize relative to control mean and SD
	qui gen std_`l'=(`var'-`var'_mean)/`var'_sd 
	* Impute missing values 
	egen mean_std_`l'_t=mean( std_`l') if SQ==0 
	* Replacing missing values with mean standardized value for treated observations
	replace std_`l'=mean_std_`l'_t if SQ==0 & std_`l'==. 
	* Also replaces missing values with standardized value 
	* By construction is 0 for control group
	replace std_`l'=0 if SQ==1 & std_`l'==. 
	drop `var'_mean `var'_sd mean_std_`l'_t 	
} 	
				
* Outcome: proposal score (index), a means index across all proposal quality scores (techscore meanscore expscore)
egen zscore_HII_sqlow=rowmean(std_1 std_2 std_3) 
	la var zscore_HII_sqlow "Means Index"

ren std_1 techscore_sqlow
ren std_2 policyscore_sqlow
ren std_3 expscore_sqlow				

* Labels
la var techscore_cddsqlow "Technical Score"
la var policyscore_cddsqlow "Gov't Score" 
la var expscore_cddsqlow "Expert Score"
la var techscore_sqlow "Technical Score"
la var policyscore_sqlow "Gov't Score" 
la var expscore_sqlow "Expert Score"
la var zscore_HII_cddsqlow "Proposal Score (index) Lowerbound"

**************
* Table A3
**************	
* Panel A: Technocratic Selection versus	
eststo clear
	foreach var in zscore_HII_cddsqlow techscore_cddsqlow expscore_cddsqlow policyscore_cddsqlow  {
	eststo: reg `var' MS t1 MS_t1 tothhs road i.r_ward, robust
	test MS MS_t1 
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0 & t1==0
	estadd scalar controlmean = r(mean)
}

esttab `var' using "tableA3.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest controlmean, fmt( %9.0f %9.3f) labels ("Observations" "Fstat" "pvalue" "Control Mean" )) style(fixed) ///
title ("Table A3. Panel A: Technocratic Selection versus CDD Institutional Reform (lower bound)") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score") ///
keep(MS t1 MS_t1)

* Panel B: Technocratic Selection and Managerial Training				
eststo clear
foreach var in zscore_HII_sqlow techscore_sqlow expscore_sqlow policyscore_sqlow   {
	eststo: reg `var' MS TR i.block , robust
	su `var' if SQ==1
	test MS TR
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0
	estadd scalar controlmean = r(mean)
}

esttab `var' using "tableA3.rtf", append label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest controlmean, fmt( %9.0f %9.3f) labels ("N" "Fstat" "pvalue" "Control Mean" )) style(fixed) ///
title ("Table A3. Panel B: Technocratic Selection and Managerial Training (lower bound)") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score" ) ///
keep(MS TR) 
	* Add Note: Significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) robust standard errors; iii) specifications in Panel A pool the technocratic selection and training arms together (see Appendix Table A4 for full interaction model) and include strata for geographic ward and two balancing variables (distance to road and community size) from the original randomization; iv) specifications in Panel B include the two balancing variables and strata for ward crossed with CDD assignment;  v) outcomes in columns 2 to 4 are mean effects indices, expressed in standard deviation units, standardized with respect to the mean and standard deviation of control Arm 1 (Arms 1 and 4) in Figure 1 for Panel A (B) (see Kling, Liebman and Katz 2007); vi) missing scores for the 4 non-submitting communities are imputed at the highest observed score in the data; vii) outcome in column 1 is an equally weighted index of those in columns 2 to 4;  vii) the Training term in Panel B captures the additional effect of training above and beyond the effect of technocratic selection; and ix) the sample for all specifications includes all communities in Figure 1.			
	

********************************************************************************
******************** Table A4: Upper Imputation Bound, TEs *********************
********************************************************************************	

**************
* Preparation
**************
* Upper bound, for missing proposals imputed with highest value of quality score outcome
* Standardize (scale to chief (SQ==1) and CDD control (t1==0) (arm 1)
* Replace with lower bound
foreach var of varlist techscore meanscore expscore {
	qui sum `var'
	qui gen `var'_m = `var'
	replace `var'_m = r(max) if `var'==.
}

local l=0
	foreach var of varlist techscore_m meanscore_m expscore_m { 
	local l = `l' + 1 
	* Control mean and SD
	qui sum `var' if SQ==1 & t1==0
	qui gen `var'_mean=r(mean) 	
	qui gen `var'_sd=r(sd) 
	* Standardize relative to control mean and SD
	qui gen std_`l'=(`var'-`var'_mean)/`var'_sd 

	* Impute missing values 
	egen mean_std_`l'_sq1cdd1=mean( std_`l') if SQ==1 & t1==1
	egen mean_std_`l'_sq0cdd1=mean( std_`l') if SQ==0 & t1==1
	egen mean_std_`l'_sq0cdd0=mean( std_`l') if SQ==0 & t1==0

	* Replacing missing values with mean standardized value for treated observations
	replace std_`l'=mean_std_`l'_sq1cdd1 if SQ==1 & t1==1 & std_`l'==. 
	replace std_`l'=mean_std_`l'_sq0cdd1 if SQ==0 & t1==1 & std_`l'==. 
	replace std_`l'=mean_std_`l'_sq0cdd0 if SQ==0 & t1==0 & std_`l'==. 
	* Also replace missing values with standardized value
	* By construction is 0 for control group
	replace std_`l'=0 if SQ==1 & t1==0 & std_`l'==. 
	drop `var'_mean `var'_sd mean_std*
}

ren std_1 techscore_cddsqhigh
ren std_2 policyscore_cddsqhigh
ren std_3 expscore_cddsqhigh

* Outcome: proposal score (index), a means index across all proposal quality scores 
* (techscore meanscore expscore)
egen zscore_HII_cddsqhigh=rowmean(techscore_cddsqhigh policyscore_cddsqhigh expscore_cddsqhigh) 

* standardize (scale to chief (SQ==1) ignoring CDD
local l=0
foreach var of varlist techscore_m meanscore_m expscore_m { 
	local l = `l' + 1 
	* Control mean and SD
	qui sum `var' if SQ==1 
	qui gen `var'_mean=r(mean) 
	qui gen `var'_sd=r(sd) 
	* Standardize relative to control mean and SD
	qui gen std_`l'=(`var'-`var'_mean)/`var'_sd 
	* Impute missing values 
	egen mean_std_`l'_t=mean( std_`l') if SQ==0 
	* Replacing missing values with mean standardized value for treated observations
	replace std_`l'=mean_std_`l'_t if SQ==0 & std_`l'==. 
	* Also replaces missing values with standardized value 
	* By construction is 0 for control group
	replace std_`l'=0 if SQ==1 & std_`l'==. 
	drop `var'_mean `var'_sd mean_std_`l'_t 	
} 	
		
* Outcome: proposal score (index), a means index across all proposal quality scores 
* (techscore meanscore expscore)
egen zscore_HII_sqhigh=rowmean(std_1 std_2 std_3) 
	la var zscore_HII_sqhigh "Means Index"

ren std_1 techscore_sqhigh
ren std_2 policyscore_sqhigh
ren std_3 expscore_sqhigh				

* Labels
la var techscore_cddsqhigh "Technical Score"
la var policyscore_cddsqhigh "Gov't Score" 
la var expscore_cddsqhigh "Expert Score"
la var techscore_sqhigh "Technical Score"
la var policyscore_sqhigh "Gov't Score" 
la var expscore_sqhigh "Expert Score"
la var zscore_HII_cddsqhigh "Proposal Score (index)"

**************
* Table A4
**************	
* Panel A: Technocratic Selection versus CDD Institutional Reform					
eststo clear
foreach var in zscore_HII_cddsqhigh techscore_cddsqhigh expscore_cddsqhigh policyscore_cddsqhigh  {
	eststo: reg `var' MS t1 MS_t1 tothhs road i.r_ward, robust
	test MS MS_t1 
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0 & t1==0
	estadd scalar controlmean = r(mean)
}

esttab `var' using "tableA4.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest controlmean , fmt( %9.0f %9.3f) labels ("Observations" "Fstat" "pvalue" "Control Mean" )) style(fixed) ///
title ("Table A4. Panel A: Technocratic Selection versus CDD Institutional Reform (upper bound)") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score") ///
keep(MS t1 MS_t1)

* Panel B: Technocratic Selection and Managerial Training				
eststo clear
foreach var in zscore_HII_sqhigh techscore_sqhigh expscore_sqhigh policyscore_sqhigh   {
	eststo: reg `var' MS TR i.block , robust
	su `var' if SQ==1
	test MS TR 
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0
	estadd scalar controlmean = r(mean)
}

esttab `var' using "tableA4.rtf", append label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest controlmean, fmt( %9.0f %9.3f) labels ("N" "Fstat" "pvalue" "Control Mean" )) style(fixed) ///
title ("Table A4. Panel B: Technocratic Selection and Managerial Training (upper bound)") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score" ) ///
keep(MS TR)
	* See notes above (Table A3). 

	
********************************************************************************
*********************** Table A5: Full Interaction Model ***********************
********************************************************************************	
* Interactions 
cap gen TR_t1 = TR*t1
cap gen MS_t1 = MS*t1
	la var TR_t1 "Training * CDD"
	la var MS_t1 "Technocratic Selection * CDD"
	la var t1 "CDD"
	la var TR "Training"
	la var MS "Technocratic Selection"
			
* Regressions			
eststo clear
foreach var in zscore_HII_cddsq techscore_cddsq expscore_cddsq policyscore_cddsq winner  {
	eststo: xi: reg `var' MS TR t1 MS_t1 TR_t1 tothhs road i.r_ward, robust
	test MS TR MS_t1 TR_t1 
	estadd scalar ftest1 = r(F)
	estadd scalar pftest1 = r(p)
	test t1 MS_t1 TR_t1
	estadd scalar ftest2 = r(F)
	estadd scalar pftest2 = r(p)
	test MS TR t1 MS_t1 TR_t1 
	estadd scalar ftest3 = r(F)
	estadd scalar pftest3 = r(p)
	quietly sum `var' if MS==0 & t1==0
	estadd scalar controlmean = r(mean)
}

* Table 
esttab `var' using "tableA5.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(ftest1 pftest1 ftest2 pftest2 ftest3 pftest3 N controlmean, fmt( %9.3f %9.3f %9.3f %9.3f %9.3f %9.3f %9.0f %9.3f) labels ("F-statistic (on TS, TR and interactions)" "p-value" "F-statistic (on CDD and interactions)" "p-value" "F-statistic (on TS, TR, CDD and interactions)" "p-value" "Observations" "Control Mean" )) style(fixed) ///
title ("Table A5: Full Interaction Model") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score" "Won a Grant") keep(MS TR t1 MS_t1 TR_t1) 
		* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) specifications include strata for geographic ward and two balancing variables (distance to road and community size) from the original randomization; iii) robust standard errors; iv) outcomes coded to treatment arm mean for communities that did not submit a proposal in columns 2 to 5; v) outcomes in columns 2 to 4 are mean effects indices, expressed in standard deviation units, standardized with respect to the mean and standard deviation of Arm 1 in Figure 1 (see Kling, Liebman and Katz 2007); vi) outcomes in column 1 are an equally weighted index of those in columns 2 to 4; vii) outcomes in columns 5 and 6 are expressed in proportions; viii) the F-statistic and associated p-value evaluate the hypothesis that the listed terms are jointly equal to zero; and ix) the sample for all specifications includes all communities in Figure 1 (Arms 1 to 6).	
		
		
********************************************************************************
****************** Table A6: Technocaratic Selection vs. CDD *******************
********************************************************************************
eststo clear
foreach var in zscore_HII_cddsq techscore_cddsq expscore_cddsq policyscore_cddsq winner {
	eststo: xi: reg `var' MS t1  tothhs road i.r_ward, robust
	test MS t1 
	estadd scalar ftest_joint = r(F)
	estadd scalar pftest_joint = r(p)
	test MS=t1
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0 & t1==0
	estadd scalar controlmean = r(mean)
}
	
esttab `var' using "tableA6.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest_joint pftest_joint ftest pftest controlmean, fmt( %9.0f %9.3f) labels ("Observations" "Fstat (on TS and CDD)" "pvalue" "Fstat TS=CDD" " pvalue" "Control Mean" )) style(fixed) ///
title ("Table A6: Two Way Comparison of Technocratic Selection and CDD") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score" "Won a Grant" "Winner, 50th Percentile") ///
keep(MS t1) 
	* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) specifications include strata for geographic ward and two balancing variables (distance to road and community size) from the original randomization; iii) robust standard errors; iv) outcomes coded to treatment arm mean for communities that did not submit a proposal in columns 2 to 5; v) outcomes in columns 2 to 4 are mean effects indices, expressed in standard deviation units, standardized with respect to the mean and standard deviation of Arm 1 in Figure 1 (see Kling, Liebman and Katz 2007); vi) outcomes in column 1 are an equally weighted index of those in columns 2 to 4; vii) the F-statistic and associated p-value evaluate the hypothesis that the listed terms are jointly equal to zero; and viii) outcomes in columns 5 and 6 are expressed in proportions.
	

********************************************************************************
************** Table A7: Effects for Simulated Winning Thresholds **************
********************************************************************************
* Find percentiles
summ meanscore, d

* Percentiles for simulated thresholds 
gen winner25 = 0
	replace winner25 = 1 if meanscore>44.5
gen winner50 = 0
	replace winner50 = 1 if meanscore>54.3
gen winner75 = 0
	replace winner75 = 1 if meanscore>62

* Regressions for different thresholds
eststo clear
foreach var in winner winner25 winner50 winner75 {
	eststo: reg `var' MS TR i.block, robust
	su `var' if SQ==1
	test MS TR
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	su  `var'
	estadd scalar winnners = r(sum)
	quietly sum `var' if MS==0
	estadd scalar controlmean = r(mean)
}

esttab `var' using "tableA7.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest winnners controlmean , fmt( %9.0f %9.3f) labels ("N" "Fstat" "pvalue" "# winners" "Control Mean" )) style(fixed) ///
title ("Table A7: Technocratic Selection Effects for Alternative Winning Thresholds") starlevels(* .1 ** .05 *** .01) mlabels("Winner" "Winner 25th Percentile" "Winner 50th Percentile" "Winner 75th Percentile") ///
keep(MS TR)
	* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) robust standard errors; iii) pecifications include strata for geographic ward crossed with CDD assignment; outcomes in column (2)-(4) are binary indicator for winning a grant at percentiles of the government proposal score distribution; v) the F-statistic and associated p-value evaluate the hypothesis that the listedd terms are jointly equal to zero; and vi) the mean and standard deviation of each outcome for the excluded group in each specification. 
	

********************************************************************************
************** Table A8: Delegation to (Un)Trained Technocrats *****************
********************************************************************************
* Delegation by arm 
foreach var in chiefsay chiefwrote chiefbudg chieftime {
	ttest `var' if MS==1, by(TR)
	gen tr_`var'_1=r(mu_1)
	gen tr_`var'_2=r(mu_2)
	gen tr_`var'_3=r(p)
	gen tr_`var'_4=r(N_1)+r(N_2)
}

su tr_chief*_* 
	
* Summarise by arm 	
preserve
	keep tr_chief*_*
	gen i=_n
	keep if _n==1

	reshape long tr_chiefsay_ tr_chiefwrote_ tr_chiefbudg_ tr_chieftime_ , i(i) j(col)

	label var tr_chiefsay_ "Proportion where chiefly authorities chose the project"
	label var tr_chiefwrote_  "Proportion where chiefly authorities wrote the description"
	label var tr_chiefbudg_ "Proportion where chiefly authorities did the budget" 
	label var tr_chieftime_ "Proportion where chiefly authorities set the timeline"

	eststo clear
	estpost tabstat tr_chiefsay_ tr_chiefwrote_ tr_chiefbudg_ tr_chieftime_, by(col) ///
	statistics(mean) columns(statistics) listwise not
	esttab using "tableA8.rtf", replace main(Mean) /// 
	nostar unstack noobs nonote nonumber nomtitles nodep label ///
	eqlabels("Technocratic Selection (arms 2,5)" "Training (arms 3,6)" "p-value on difference" "Observations")	
restore
	* Add Notes: outcomes capture the proportion of management decisions that were made by the village headman or other chiefly authorities in the community and compares technocrats with and without training. 

drop tr_chief*_*


********************************************************************************
************************ Table A9: Delegation Unpacked *************************
********************************************************************************
* Full Data
tempfile fulldata
save `fulldata', replace

* Treatment indicators & randomisation blocking vars
keep MS id_vill r_ward t1 
gen MS_t1 = MS*t1
label var t1 "CDD"
label var MS_t1 "Technocratic Selection * CDD"
tempfile treat
save `treat'

* Data from fuzzy merging by names the managerical capital instrument 
* and submission instruments by names; original data cannot be made public
* See ReadMe and namematch_sub_mccap.do for details
use "public_submission_matched.dta", clear
merge m:1 id_vill using `treat', gen(_mer)

*Regression on matching names
eststo clear
	preserve
	keep if type==5 & (pos==1 | pos==.)
	eststo: reg found MS t1 MS_t1 i.r_ward, robust
	quiet sum found if MS==0 & t1==0
	estadd scalar depmean = r(mean)
	estadd scalar depsd = r(sd)
restore

*Regressions conditional on matching
keep if found==1
foreach var of varlist chief topscorer points educ {
	preserve
		keep if type==5 & (pos==1 | pos==.)
		eststo: reg `var' MS t1 MS_t1 i.r_ward, robust
		quiet sum `var' if MS==0 & t1==0
		estadd scalar depmean = r(mean)
		estadd scalar depsd = r(sd)
	restore
}

esttab using "tableA9.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) ///
stats(N depmean, fmt( %9.0f %9.3f) labels ("Observations" "Control Mean")) style(fixed) ///
title ("Table A9: Delegation Unpacked") starlevels(* .1 ** .05 *** .01) mlabels("Names Match" "Is Chief" "Top Scorer" "Points" "Education") ///
keep(MS t1 MS_t1)
	* Add Notes: Notes i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01. ii) specifications include strata for geographic ward; iii) robust standard errors; iv) F-stat and associated p-value correspond to jointly testing that the three coefficients in each specification are equal to 0; v) Project leader is defined as the person who had the most say in choosing which project to propose; vii) the outcome in Column 1 is a dummy taking the value of 1 if the name of the project leader mentioned in the submission form is matched to those taking the managerial capital test, viii) estimates in Columns 2-5 are conditional on a match in Column 1 and report whether the project leader mentioned is the Chief (Column 2), whether they are the top scorer in the managerial capital test (Column 3), the managerial capital test score of the project leader (Column 4) and their years of education (Column 5); and ix) the bottom row includes the number of observations (communities), where out of 236 communities in the whole sample, 222 filled out the submission survey, and of these, 192 answered the question that allowed us to get a name for the project leader. For 149 communities, we can successfully match names to someone in the managerial capital test data (implying the leader was either the village headman or one of the 5 community nominees).
	

********************************************************************************
*********** Table A10: Management Training and "Teaching to the Test ***********
********************************************************************************
use `fulldata', clear	

* Cleaning, Averaging
*replace tsc_b_4 = "." if tsc_b_4=="DK"
*replace tsc_c_5 = "." if tsc_c_5=="DK"
*foreach var in tsc_b_4 tsc_c_5 {
*	destring `var', replace
*	}

egen ts = rowtotal(tsc_b_1 tsc_b_2 tsc_b_3 tsc_b_4 tsc_b_5 tsc_c_1 tsc_c_2 tsc_c_3 tsc_c_4 tsc_c_5 tsc_c_6 tsc_d_1 tsc_d_2 tsc_d_3 tsc_d_4)
replace tsc_d_3 = 1 if tsc_d_3 == 2
replace ts = . if tsc_b_1==.

* "Copycat" Measures
label var tsc_b_5 "Mentions sustainability"
label var tsc_c_6 "Mentions multiple bids"
label var tsc_d_4 "Mentions skills needed"

* Performance Spillover Measures
label var tsc_b_3 "Says who will benefit"
label var tsc_c_3 "Says where items are bought"

* Standardize (scale to chief (SQ==1) ignoring CDD 
local l=0
foreach var of varlist tsc_b_3 tsc_c_3 tsc_b_5 tsc_c_6 tsc_d_4 { 
	local l = `l' + 1 
	* Control mean and SD
	qui sum `var' if SQ==1
	qui gen `var'_mean=r(mean) 
	qui gen `var'_sd=r(sd) 
	* Standardize relative to control mean and SD
	qui gen std_`l'=(`var'-`var'_mean)/`var'_sd 
	* Impute missing values 
	egen mean_std_`l'_t=mean( std_`l') if SQ==0 
	* Replacing missing values with mean standardized value for treated observations
	replace std_`l'=mean_std_`l'_t if SQ==0 & std_`l'==. 
	* Also replaces missing values with standardized value 
	* By construction is 0 for control group
	replace std_`l'=0 if SQ==1 & std_`l'==. 
	drop `var'_mean `var'_sd mean_std_`l'_t 	
} 

* Outcome: means index across all proposal quality	
egen zscore_perspill=rowmean(std_1 std_2) 
replace zscore_perspill=. if tsc_b_3==. | tsc_c_3==.
la var zscore_perspill "Index"
la var std_1 "Says who will benefit"
la var std_2 "Says where items are bought"
egen zscore_copycat=rowmean(std_3 std_4 std_5) 
la var zscore_copycat "Index"
la var std_3 "References sustainability"
la var std_4 "References multiple bids"
la var std_5 "References skills needed"
eststo clear
replace zscore_perspill=. if tsc_b_3==. | tsc_c_3==.
replace zscore_copycat=. if tsc_b_5==. | tsc_c_6==. | tsc_d_4==.

* Regressions
foreach var in tsc_b_3 tsc_c_3 zscore_perspill tsc_b_5 tsc_c_6 tsc_d_4 zscore_copycat {
	eststo: reg `var' MS TR i.block, robust
	test MS+TR=0
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0
	estadd scalar controlmean = r(mean)
}

esttab est4 est5 est6 est7 est1 est2 est3 using "tableA10.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N controlmean, fmt( %9.0f %9.3f) labels ("N" "Control Mean" )) style(fixed) ///
title ("Table A10: Management Training and "Teaching to the Test"") starlevels(* .1 ** .05 *** .01) ///
keep (MS TR) note ("Notes i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01." ///
"Regressions include blocking fixed effect (Wards)")
	* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) specifications include fixed effects for geographic ward crossed with CDD assignment; iii) Panel A looks for evidence of "teaching to the test" by seeing whether trainees mechanically include reference in their proposals to topics covered by the training but not asked for on the application (e.g. the training emphasized the value ofseeking multiple bids from contractors during project construction, a good practice for winners to use during implementation but not something that the application required, and column 2 shows that trainees were no more likely to include extraneous reference to it in their proposals); iv) Panel B takes the converse approach and evaluates whether the training had performance spillover effects on application questions that were not addressed in the training (e.g. the application asked for an explanation of who would benefit from the project, a topic not discussed during the training, and column 5 shows that trainees were no more conscientious in including explanation of who benefits in their proposal); and v) outcomes in columns 4 and 7 are summary indices for the multiple measures in each panel.
	
	
	
********************************************************************************
*************** Table A11: Infrastructure Assessment of Winners ****************
********************************************************************************
preserve
	* Data from the July 2018 field inspection of infrastructure 
	* projects that won an implementation grant from the government
	* competition.
	use "public_infra_data.dta", clear
	
	* Test by treatment status	
	foreach var in function quality contribute near_chief {
		sum `var'
		gen mn_all_`var'=r(mean)
		
		ttest `var', by(ms)
		gen mn_ms_`var'=r(mu_2)
		gen mn_sq_`var'=r(mu_1)
		gen p_mdif_`var'=r(p)
		gen n_ms_`var'=r(N_2)
		gen n_sq_`var'=r(N_1)
		
		ttest `var', by(t1)
		gen mn_gbf_`var'=r(mu_2)
		gen mn_ctl_`var'=r(mu_1)
		gen p_gdif_`var'=r(p)
		gen n_gbf_`var'=r(N_2)
		gen n_ctl_`var'=r(N_1)
	}
	
	* Table as dataset
	keep mn_* p_* n_*
	gen n=_n
	keep if n==1
	reshape long mn_all mn_ms mn_sq p_mdif mn_gbf mn_ctl p_gdif n_ms n_sq n_gbf n_ctl, i(n) j(x) string
	drop n
	gen order=4
	replace order=1 if x=="_function"
	replace order=2 if x=="_quality"
	replace order=3 if x=="_contribute"
	sort order
	drop order
	
	gen mdif = mn_ms - mn_sq
	gen gdif = mn_gbf - mn_ctl
	order x mn_all mn_ms mn_sq mdif p_mdif n_ms n_sq mn_gbf mn_ctl gdif p_gdif n_gbf n_ctl 
	outsheet using "tableA11.csv", comma replace
restore
	* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) data is from the July 2018 field inspection of infrastructure projects that won an implementation grant from government competition; iii) estimated displayed are from two-sided t-tests for each of the two distinct experimental assignments; and iv) The first outcome is a binary variable of whether the infrastructure project is present and functional, the second one is a categorical variable measuring the quality of construction where 1 means "poor" and 10 means "excellent", the third one is the total community financial contributions in US$, and the fourth one is a binary variable of whether the infrastructure is located near the chief's compound. 
	

********************************************************************************
***************** Table A12: Text Analysis of Proposal Content *****************
********************************************************************************
* Panel A controlling for the type of project (e.g. fixed effects for school, latrine, commmunity center, etc.)
* to show that while GoBifo picked slightly different types of projects (as shown in A9) 
* this does not explain the performance results.
preserve
	
	* Remove pilot villages for merge
	use "public_vill_overview.dta", clear
	keep if !mi(id_vill) // Pilot villages
	save "temp_vill_overview.dta", replace
	
		* Was proposal content different in terms of inclusiveness?
	use "public_contentproposals.dta", clear
	drop if id_vill==.
	
	* Merge in treatment
	merge 1:1 id_vill using "temp_vill_overview.dta"
		label var MS "Technocratic Selection"
		label var t1 "CDD"
	ren _merge _merget
	
	* What type of projects where submitted?
	gen submit = 0 
		replace submit = 1 if inclusiveness!=.
		
	* Only mention or first project mentioned of two, else other
	gen commcntr = 0 if submit  == 1
	replace commcntr = 1 if project == "Community center" | project == "community center" | ///
			project == "community center rehabilitation" | project == "community barray " | ///
			project == "community barry" | project == "community barry (multi-purpose barry)" | ///
			project =="community center (multi purpose center)" | project == "community center rehabilitation " | ///
			project == "rehabilitation community barray" | project == "rehabilitation court barrie" | ///
			project == "repair community center" | project == "community barray and toilets" | ///
			project =="community barry and drying floor" | project =="community barry and well" | ///
			project ==" community center and grain store" | project == "community center and toilet" | ///
			project == "maintenance community center, drying .." | project == "rehabilitation community barry and to.." | ///
			project == "rehabilitation community center and d.." | project == ""
			
	gen educ = 0 if submit  == 1
	replace educ = 1 if project == "adding classrooms to school building" | project == "primary school"| ///
			project == "rehabilitation school "| project == "repair local school building" | ///
			project == "school" | project == "rehabilitation school and adding a cl.."
						
	gen water = 0 if submit == 1
	replace water = 1 if project =="water well" | project == "water well & community center" | ///
			project == "water well & toilet"
	
	gen other = 1 if submit ==1
	replace other = 0 if water==1 | educ ==1 | commcntr ==1
	
	gen institutiond = 0
	replace institutiond = 1 if institutions>0
	
	foreach var in commcntr educ water other {
		replace `var' = . if submit==0
	}
	
	* Regressions
	eststo clear
		foreach var in inclusiveness institutiond commcntr educ water other  {
		*eststo: reg `var' t1 i.r_ward tothhs road, robust
		eststo: reg `var' t1 MS i.r_ward tothhs road, robust
		quietly sum `var' if MS==0 & t1==0
		estadd scalar controlmean = r(mean)
	}
	esttab `var' using "tableA12.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N controlmean, fmt( %9.0f %9.3f) labels ("Observations" "Control Mean" )) style(fixed) ///
	title ("Table A12. Text Analysis of Proposal Content Across CDD Treatment") starlevels(* .1 ** .05 *** .01) mlabels("Proposal mentions inclusiveness terms" "Proposal mentions community institutions" "Community Center project" "Education project" "Water project " "Other project") ///
	keep (t1 MS)
		* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) specifications include strata for geographic ward and two balancing variables (distance to road and community size) from the randomization; and iii) robust standard errors. 
	
	* Store data for later
	keep id_vill commcntr educ water other
	save "temp_projtype.dta", replace	
restore


********************************************************************************
********** Table A13: Treatment Effects Controlling for Project Type ***********
********************************************************************************
merge 1:1 id_vill using "temp_projtype.dta"
	
eststo clear
foreach var in zscore_HII_cddsq techscore_cddsq expscore_cddsq policyscore_cddsq winner  {
	eststo: reg `var' MS t1 MS_t1 tothhs road i.r_ward commcntr educ water, robust
	test MS MS_t1 
	estadd scalar ftest = r(F)
	estadd scalar pftest = r(p)
	quietly sum `var' if MS==0 & t1==0
	estadd scalar controlmean = r(mean)
}
	
esttab `var' using "tableA13.rtf", replace label cells(b(star fmt(%9.3f)) se(par fmt(%9.3f))) stats(N ftest pftest controlmean, fmt( %9.0f %9.3f) labels ("Observations" "Fstat" "pvalue" "Control Mean" )) style(fixed) ///
title ("Table A13: Technocratic Selection versus CDD Institutional Reform with project type controls") starlevels(* .1 ** .05 *** .01) mlabels("Proposal Score (index)" "Technical Score" "Expert Score" "Gov't Score" "Won a Grant" "Winner, 50th Percentile") ///
keep(MS t1 MS_t1 commcntr educ water)
	* Add Notes: i) significance levels indicated by * p < 0.10, ** p <0.05, *** p < 0.01; ii) robust standard errors; iii) project type fixed effects denote the sectoral type of project proposed by the community in the grants competition application; iv) specifications pool the technocratic selection and training arms together and include strata for geographic ward and two balancing variables (distance to road and community size) from the original randomization; v) outcomes in columns 2 to 4 are mean effects indices, expressed in standard deviation units, standardized with respect to the mean and standard deviation of control arm 1 (arms 1 and 4) in Figure 1 for Panel A (B) (see Kling, Liebman and Katz 2007); vi) missing scores for the 4 non-submitting communities are imputed at the respective treatment arm mean; vii) outcome in column 1 is an equally weighted index of those in columns 2 to 4; viii) outcome in column 5 is a binary indicator; ix) the F-statistic and associated p-value evaluate the hypothesis that the listed terms are jointly equal to zero; x) the mean and standard deviation of each outcome for the excluded group in each specification ; and xi) sample includes all communities in Figure 1. 
	
	

********************************************************************************
********************* Tidy Up: Temp files from analysis  ***********************
********************************************************************************
sleep 1000 // In case still writing to disk
erase "temp_projtype.dta"
erase "temp_vill_overview.dta"
